## *****************************************
##      B13 Firm Branches Identification       
##          Hao Zhang & Ye Zhang
##              2025/7/24
## *****************************************

# load packages
library(readstata13)
library(tidyverse)
library(doBy)
library(haven)
library(lfe)
library(stargazer)

# read in processed dta
setwd("../raw data")
df <- read.dta13("cies name 1998-2007.dta")

# assign unified_firm_id
df <- df %>%
  mutate(base_name = str_replace(name, "(.*?)(总|有限责任)?(公司|厂|集团).*", "\\1")) %>%
  # filter(nchar(base_name) >= 4) %>%
  mutate(unified_firm_id = as.integer(as.factor(base_name)))

# force all firm_id to have a unique unified firm id
df <- df %>%
  group_by(frdm) %>%
  mutate(unified_firm_id = first(unified_firm_id)) %>%
  ungroup()

# select rows
df <- df %>% select(frdm, unified_firm_id) %>% distinct()

# create group indicator
df <- df %>%
  group_by(unified_firm_id) %>%
  mutate(is_duplicate_firm = n() > 1) %>%
  ungroup()

# load data
load("../analysis data/firm analysis.RData")

# merge frdm and unified_firm_id
firm <- left_join(firm, df, by = "frdm")

# run inverse hyperbolic sine
ihs <- function(x) {
  y <- log(x + sqrt(x ^ 2 + 1))
  return(y)
}

# unemployment insurance
m1 <- felm(insurance_dummy ~ gdp_pressure + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax |
             frdm + year + educ + ethnicity  + gender +
             begin + connection + seq | 0 | citycode + unified_firm_id, data = firm)

firm1 <- firm %>% filter(rate1 > 0)
m2 <- felm(log(rate1) ~ gdp_pressure + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax |
             frdm + year + educ + ethnicity  + gender +
             begin + connection + seq | 0 | citycode + unified_firm_id, data = firm1)

# medical insurance
m3 <- felm(medical_dummy ~ gdp_pressure + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax |
             frdm + year + educ + ethnicity  + gender +
             begin + connection + seq | 0 | citycode + unified_firm_id, data = firm)

firm2 <- firm %>% filter(rate2 > 0)
m4 <- felm(log(rate2) ~ gdp_pressure + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax |
             frdm + year + educ + ethnicity  + gender +
             begin + connection + seq | 0 | citycode + unified_firm_id, data = firm2)

# output regression table
stargazer(m1, m2, m3, m4, title = "Interjurisdictional Competition and Social Insurances (Multiway Clustering)", 
          dep.var.labels = c("Unemployment", "Unemployment", "Health and Pension", "Health and Pension"),
          omit.stat = c("rsq", "adj.rsq", "ser"), 
          font.size = "small", 
          add.lines = list(c("Mayor Characteristics", "Y", "Y", "Y", "Y"),
                           c("Firm Characteristics", "Y", "Y", "Y", "Y"), 
                           c("Firm Fixed Effects", "Y", "Y", "Y", "Y"), 
                           c("Year Fixed Effects", "Y", "Y", "Y", "Y")))

# get rid of these firms with branches
firm <- firm %>% filter(is_duplicate_firm == FALSE)

# unemployment insurance
m1 <- felm(insurance_dummy ~ gdp_pressure + ihs(employment) + 
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm)

firm1 <- firm %>% filter(rate1 > 0)
m2 <- felm(log(rate1) ~ gdp_pressure + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm1)

# medical insurance
m3 <- felm(medical_dummy ~ gdp_pressure + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm)

firm2 <- firm %>% filter(rate2 > 0)
m4 <- felm(log(rate2) ~ gdp_pressure + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm2)

# output regression table
stargazer(m1, m2, m3, m4, title = "Interjurisdictional Competition and Social Insurances (No Firms with Branches)", 
          dep.var.labels = c("Unemployment", "Unemployment", "Health and Pension", "Health and Pension"),
          omit.stat = c("rsq", "adj.rsq", "ser"), 
          font.size = "small", 
          add.lines = list(c("Mayor Characteristics", "Y", "Y", "Y", "Y"),
                           c("Firm Characteristics", "Y", "Y", "Y", "Y"), 
                           c("Firm Fixed Effects", "Y", "Y", "Y", "Y"), 
                           c("Year Fixed Effects", "Y", "Y", "Y", "Y")))
